rm(list = ls(all.names = TRUE))
gc(reset=T)
rm(list=ls())

# Packages -----------------
library("nnet")
library("dplyr")
library("lme4")
library("plyr")

#packageurl<-"https://cran.r-project.org/src/contrib/Archive/nloptr/nloptr_1.2.1.tar.gz"
#install.packages(packageurl, repos=NULL, type="source")

# Input ------

args <- commandArgs(TRUE)

input_discipline <- as.character(args[1]) #Discipline

print(input_discipline)

# Set Up Data -----------------

Predicted_Year_Delta_T <- data.frame(Delta_T=rep(seq(1:5),length(seq(1990,2017))),Year=seq(1990,2017))

# Here, instead, read in COMBINED_OUTPUT_R_Ideal_NODF_Matrix_Citations.csv.gz
# and subset on input_discipline and the following
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
t4 <- read.csv(paste("/Path/Data/OUTPUT_R_Ideal_NODF_Matrix_Citations_Censored20_Deflated_Window5_Log_",input_discipline,".csv.gz",sep="")) %>%
  dplyr::rename(GRID=Nations)%>%
  dplyr::rename(Predicted_Value_in_Present=Value,Future_Year=Year)  %>%
  dplyr::select(-Sample_Stratified,-Citation_Deflated,-Citation_Window,-Citation_Logged,-Discipline)

# Here, instead, read in COMBINED_OUTPUT_R_NODF_Matrix_Citations.csv.gz
# and subset on input_discipline and the following
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
t5 <- read.csv(paste("/Path/Data/OUTPUT_R_NODF_Matrix_Citations_Censored20_Deflated_Window5_Log_",input_discipline,".csv.gz",sep="")) %>%
  dplyr::rename(GRID=Nations)%>%
  dplyr::rename(Actual_Future_Value=Value)%>%
  merge(Predicted_Year_Delta_T,.,by=c("Year"),all.x=T)%>%
  dplyr::mutate(Future_Year = Year+Delta_T)%>%
  
  dplyr::group_by(Discipline,GRID,Year)%>%
  dplyr::mutate(GRID_Sum = sum(Actual_Future_Value))%>%
  dplyr::group_by(Discipline,Topics,Year)%>%
  dplyr::mutate(Topic_Sum = sum(Actual_Future_Value))%>%
  
  merge(.,t4,by=c("GRID","Topics","Future_Year"),all.x=T)%>%
  tidyr::drop_na()


t5_GRID_Sum_Scale <- t5 %>% select("Discipline","Year","GRID_Sum") %>% unique() %>% arrange(Year) %>% group_by(Discipline,Year)  %>% mutate(GRID_Sum_Scale = scale(GRID_Sum)) %>% as.data.frame()

t5_Topic_Sum_Scale <- t5 %>% select("Discipline","Year","Topic_Sum") %>% unique() %>% arrange(Year) %>% group_by(Discipline,Year) %>% mutate(Topic_Sum_Scale = scale(Topic_Sum)) %>% as.data.frame()


# Logit Models -----------------

final_m1.coef <- data.frame()
  for(year_ in seq(1990,2017)){
    for(Delta_T_ in seq(1,5)){
      
      t5_test <- t5 %>%
         subset(Year==year_ & Delta_T==Delta_T_)
      #   merge(.,t5_GRID_Sum_Scale %>%
      #           subset(Year==year_),
      #         by=c("Discipline","Year","GRID_Sum"),all.x=T) %>% 
      #   merge(.,t5_Topic_Sum_Scale%>%
      #           subset(Year==year_),
      #         by=c("Discipline","Year","Topic_Sum"),all.x=T)
      # 
     
       test <- tryCatch(glmer(Actual_Future_Value ~ Predicted_Value_in_Present  +
                            #log(GRID_Sum)  + log(Topic_Sum) + 
                               (1|GRID) + (1|Topics), 
                             data = t5_test,
                             family = "binomial"),
                       error=function(e) NA)
      if(is.na(test)){ break } else { 
        print(paste(input_discipline,year_,sep=" "))
        print(test)}
      
      m1.coef <- data.frame(coef(summary(test)))
      m1.coef$Discipline <- input_discipline
      m1.coef$Delta_T <- Delta_T_
      m1.coef$Year <- year_
      m1.coef$Coefficients <- row.names(m1.coef)
      row.names(m1.coef) <- NULL
      final_m1.coef <- rbind(final_m1.coef,m1.coef) 
    }
  }


# Data to Plot Coefficients -----------------

colnames(final_m1.coef)[4]<-"pvalue"

# Here, instead, read in COMBINED_OUTPUT_R_NODF_Citations.csv.gz
# and subset on input_discipline and the following
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
nodf_scores <- read.csv(paste("/Path/Data/OUTPUT_R_NODF_Citations_Censored20_Deflated_Window5_Log_",input_discipline,".csv.gz",sep=""))

final_m1_coef_nodf <- final_m1.coef %>%
  select(-c(Discipline))%>%
  merge(.,nodf_scores%>%subset(select=c(NODF_Normed,Year,Discipline)),
        by=c("Year"))

# Write to File Coefficients Dataframe -----------------

write.csv(final_m1_coef_nodf,paste("/Path/Data/OUTPUT_R_Logit_Coefficients_BASELINE_",input_discipline,".csv",sep=""),row.names=F)
